# selected countries
country_list = c('CHN','DEU','IND','JPN','PRK','PAK','PSE','SDN','SYR','USA')
# read the file with country code, country name, and life expectancy
life_df <- read_csv('life_exp.csv',show_col_types = FALSE) %>%
mutate(year = as.numeric(year))
# read the file with food intake
food_df <- read_csv('Country-level Nutritions.csv',show_col_types = FALSE)%>%
filter(varnum <15)%>%
rename(intake=median)
# read the file with food code, and food name
food_code <- read_csv('food_code.csv',show_col_types = FALSE)
# read the file with food code, and food name
country_code <- read_csv('country_code.csv',show_col_types = FALSE)
# left join the two files above
food_life <- food_df %>%
left_join(x=food_df, y=life_df, by = c('iso3'='Code','year'='year')) %>%
left_join(.,y=food_code, by = c('varnum'='varnum'))
top_food_box <- food_life %>%
filter(year==2018,age==999,female==999,urban==999,edu==999) %>%
ggplot(.,aes(x=reorder(Food,intake), y=intake))+
geom_boxplot( aes(color=Food,group=Food,fill=Food),alpha = 0.7)+
labs(title="Food Intake Worldwide",
caption= "Source:Global Dietary Database",
y="Average Food Daily Intake(g/d)",x='')+
theme_pander()+
coord_flip()
ggplotly(top_food_box)
top_food <- food_life %>%
filter(year==2018,age==999,female==999,urban==999,edu==999) %>%
group_by(Food)%>%
summarise(intake_avg = round(mean(intake),2))%>%
arrange(desc(intake_avg))
datatable(top_food,filter = list(position = "top"),rownames = FALSE)
top_food_sex <- food_life %>%
filter(year==2018,age==999,female!=999,urban==999,edu==999) %>%
mutate(sex=if_else(female==1,'female','male'))%>%
group_by(Food,sex)%>%
summarise(intake_avg = round(mean(intake),2))%>%
arrange(desc(intake_avg))
## `summarise()` has grouped output by 'Food'. You can override using the
## `.groups` argument.
ggplot(top_food_sex,aes(x=reorder(Food,intake_avg), y=intake_avg,fill=sex)) +
geom_bar(stat="identity",position="dodge", width = 0.5, alpha = 0.7) +
labs(title="Differences in Food Intake for Female & Male",
caption= "Source:Global Dietary Database",
y="Average Food Daily Intake",x='')+
theme_pander()+
coord_flip()
top_food_area <- food_life %>%
filter(year==2018,age==999,female==999,urban!=999,edu==999) %>%
mutate(area=if_else(urban==1,'urban','rural'))%>%
group_by(Food,area)%>%
summarise(intake_avg = round(mean(intake),2))%>%
arrange(desc(intake_avg))
## `summarise()` has grouped output by 'Food'. You can override using the
## `.groups` argument.
ggplot(top_food_area,aes(x=reorder(Food,intake_avg), y=intake_avg,fill=area)) +
geom_bar(stat="identity",position="dodge", width = 0.5, alpha = 0.7) +
labs(title="Differences in Food Intake for Urban & Rural Areas",
caption= "Source:Global Dietary Database",
y="Average Food Daily Intake",x='')+
theme_pander()+
coord_flip()
## Warning: Removed 13 rows containing missing values (geom_bar).
# Average Food Intake Worldwide by Year
top_year <- food_life %>%
filter(age==999,female==999,urban==999,edu==999) %>%
group_by(year,Food)%>%
summarise(intake_avg = round(mean(intake),2))%>%
arrange(desc(intake_avg))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
plot1 <- top_year%>%
ggplot(., aes(x = year, y=intake_avg,group=Food,color=Food)) +
geom_line( size=1)+
geom_point(size=1)+
labs(x='Year', y='Intake Avg(g/day)', title='Average Food Intake Worldwide')+
theme_classic()
ggplotly(plot1)
a <- food_life %>%
filter(
varnum ==7,
age==999,female==999,urban==999,edu==999) %>%
ggplot(., aes(x = year, y=intake,group=Country,color=Country)) +
geom_line( size=1,color='steelblue')+
geom_point(size=2)+
labs(x='Year', y='Intake (g/day)', title="Refined grains")+
theme_classic()
b <- food_life %>%
filter(
varnum ==2,
age==999,female==999,urban==999,edu==999) %>%
ggplot(., aes(x = year, y=intake,group=Country,color=Country)) +
geom_line( size=1)+geom_point(size=2)+
labs(x='Year', y='Intake (g/day)', title="Non-starchy vegetables")+
theme_classic()
c <- food_life %>%
filter(
varnum ==1,
age==999,female==999,urban==999,edu==999) %>%
ggplot(., aes(x = year, y=intake,group=Country,color=Country)) +
geom_line( size=1)+
geom_point(size=2)+
labs(x='Year', y='Intake (g/day)', title="Fruits")+
theme_classic()
d <- food_life %>%
filter(
varnum ==3,
age==999,female==999,urban==999,edu==999) %>%
ggplot(., aes(x = year, y=intake,group=Country,color=Country)) +
geom_line( size=1)+geom_point(size=2)+
labs(x='Year', y='Intake (g/day)', title="Potatoes")+
theme_classic()
e <- food_life %>%
filter(
varnum ==8,
age==999,female==999,urban==999,edu==999) %>%
ggplot(., aes(x = year, y=intake,group=Country,color=Country)) +
geom_line( size=1)+geom_point(size=2)+
labs(x='Year', y='Intake (g/day)', title="Whole grains")+
theme_classic()
lf <- food_life %>%
filter(
age==999,female==999,urban==999,edu==999) %>%
ggplot(., aes(x = year, y=life_exp,group=Country,color=Country)) +
geom_line( size=1,alpha=0.7)+
geom_point(size=2)+
labs(x='Year', y='(year-old)', title="Life Expectancy")+
theme_classic()
figure = ggarrange(lf,a, b,c, d,e,ncol = 2, nrow = 3,
common.legend = TRUE, legend = "bottom")
## Warning: Removed 364 row(s) containing missing values (geom_path).
## Warning: Removed 364 rows containing missing values (geom_point).
## Warning: Removed 364 row(s) containing missing values (geom_path).
## Warning: Removed 364 rows containing missing values (geom_point).
annotate_figure(figure,top = text_grob("Life Expectancy & Food Intake",
color = "black", face = "bold", size = 14))
c_lf <- food_life %>%
filter(iso3=='CHN',
age==999,female==999,urban==999,edu==999) %>%
ggplot(., aes(x = year, y=life_exp,group=Country,color=Country)) +
geom_line( size=1,alpha=0.7,color='steelblue')+
geom_point(size=2)+
labs(x='Year', y='(year-old)', title="Life Expectancy")+
theme_classic()
c_lf
for (x in 1:14) {
food <- subset(food_code, varnum == x)$Food
chn <- food_life %>%
filter(iso3=='CHN',
varnum==x,
age==999,female==999,urban==999,edu==999) %>%
ggplot(., aes(x = year, y=intake,group=Country,color=Country)) +
geom_line( size=1)+
geom_point(size=2)+
labs(x='Year', y='Intake(g/day)', title=food)+
theme_classic()
print(chn)
}
life_year <- life_df %>%
mutate(year=as.character(year))%>%
group_by(year)%>%
summarise(avg_life = mean(life_exp))
plot_life_year <- life_year%>%
ggplot(.,aes(x = as.numeric(year), y=avg_life) ) +
geom_line( alpha=0.7,size=1,color='steelblue')+
geom_point(size=2,alpha=0.7) +
labs(x='Year', y='life expectancy', title=' Average life expectancy Worldwide')+
theme_classic()
ggplotly(plot_life_year)
food_life2018 <- food_life %>%
filter(age==999,female==999,urban==999,edu==999,year==2018)
# top countries by life exp
top_life <- food_life2018%>%
select(Country,life_exp)%>%
rename(country=Country)%>%
arrange(desc(life_exp))%>%
distinct()
top_life[0:20,] %>%
ggplot(aes(x=reorder(country, life_exp,na.rm=TRUE),y=life_exp)) +
theme_classic()+
geom_bar(aes(fill=life_exp),position = "dodge",alpha=0.8, stat = "identity")+
coord_flip() +
labs(y='Life expectancy',
x='Country',
title='Life Expectancy Worldwide',
fill='life expectancy',
caption='Data from the World Bank')
for (x in 1:14) {
food <- subset(food_code, varnum == x)$Food
f_life <- food_life2018 %>%
filter(varnum==x)%>%
ggplot(., aes(x = intake, y=life_exp)) +
geom_point( size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake(g/d)', y='Life Expectancy', title=food)+
theme_classic()
print(f_life)
}
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).
p7<- food_life2018 %>%
filter(varnum==7)%>%
ggplot(., aes(x = intake, y=life_exp)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Life expectancy', title="Refined grains")+
theme_classic()
p2 <- food_life2018 %>%
filter(varnum==2)%>%
ggplot(., aes(x = intake, y=life_exp)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Life expectancy', title="Non-starchy vegetables")+
theme_classic()
p1 <- food_life2018 %>%
filter(varnum==1)%>%
ggplot(., aes(x = intake, y=life_exp)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Life expectancy', title="Fruits")+
theme_classic()
p3 <- food_life2018 %>%
filter(varnum==3)%>%
ggplot(., aes(x = intake, y=life_exp)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Life expectancy', title="Potatoes")+
theme_classic()
p8 <- food_life2018 %>%
filter(varnum==8)%>%
ggplot(., aes(x = intake, y=life_exp)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Life expectancy', title="Whole grains")+
theme_classic()
figure1 = ggarrange(p7,p2,p1,p3,p8,ncol = 2, nrow = 3)
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
annotate_figure(figure1,top = text_grob("Life Expectancy & Food Intake",
color = "red", face = "bold", size = 14))
# top countries by cancer rate
top_cancer <- food_life2018%>%
select(Country,cancer_rate)%>%
arrange(desc(cancer_rate))%>%
rename(country=Country)%>%
distinct()
top_cancer[0:20,] %>%
ggplot(aes(x=reorder(country, cancer_rate,na.rm=TRUE),y=cancer_rate)) +
theme_classic()+
geom_bar(aes(fill=cancer_rate),position = "dodge",alpha=0.8, stat = "identity")+
coord_flip() +
labs(y='cancer rate',
x='Country',
title='Global Cancer Incidence',
subtitle='cancer rate = Num of incidence/100,000',
fill='cancer rate',
caption='Data from World Cancer Research Fund International')
Refined grains & cancer rate
for (x in 1:14) {
food <- subset(food_code, varnum == x)$Food
f_cancer <- food_life2018 %>%
filter(varnum==x)%>%
ggplot(., aes(x = intake, y=cancer_rate)) +
geom_point( size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Cancer Rate', title=food)+
theme_classic()
print(f_cancer)
}
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).
cancer7<- food_life2018 %>%
filter(varnum==7)%>%
ggplot(., aes(x = intake, y=cancer_rate)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Cancer Rate', title="Refined grains")+
theme_classic()
cancer2 <- food_life2018 %>%
filter(varnum==2)%>%
ggplot(., aes(x = intake, y=cancer_rate)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Cancer Rate', title="Non-starchy vegetables")+
theme_classic()
cancer1 <- food_life2018 %>%
filter(varnum==1)%>%
ggplot(., aes(x = intake, y=cancer_rate)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Cancer Rate', title="Fruits")+
theme_classic()
cancer3 <- food_life2018 %>%
filter(varnum==3)%>%
ggplot(., aes(x = intake, y=cancer_rate)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Cancer Rate', title="Potatoes")+
theme_classic()
cancer11 <- food_life2018 %>%
filter(varnum==11)%>%
ggplot(., aes(x = intake, y=cancer_rate)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Cancer Rate', title="Seafoods")+
theme_classic()
cancer5 <- food_life2018 %>%
filter(varnum==5)%>%
ggplot(., aes(x = intake, y=cancer_rate)) +
geom_point(size=1)+
geom_smooth(method='lm', formula= y~x)+
labs(x='intake', y='Cancer Rate', title="Beans & legumes")+
theme_classic()
figure1 = ggarrange(cancer7,cancer2,cancer1,cancer3,cancer11,cancer5,
ncol = 2, nrow = 3)
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
annotate_figure(figure1,top = text_grob("Cancer Rate & Food Intake",
color = "red", face = "bold", size = 14))
basic_intake <- food_life %>%
filter(age==999,female==999,urban==999,edu==999) %>%
rename(continent = superregion2) %>%
mutate_at(c('intake'), function(x) round(x,2))%>%
select(continent,year,Country,Food,intake) %>%
arrange(Country)
datatable(basic_intake,filter = list(position = "top"),rownames = FALSE)
basic_life <- food_life %>%
filter(age==999,female==999,urban==999,edu==999) %>%
rename(continent = superregion2) %>%
mutate_at(c('cancer_rate','life_exp'), function(x) round(x,2))%>%
select(continent,year,Country,life_exp,cancer_rate) %>%
arrange(Country)
datatable(basic_life,filter = list(position = "top"),rownames = FALSE)